In [1]:
import numpy as np
import pandas as pd
import matplotlib as mpl # used sparingly
import matplotlib.pyplot as plt
In [2]:
pd.set_option("notebook_repr_html", False)
pd.set_option("max_rows", 10)
In [3]:
%matplotlib inline
matplotlibrc
use
function (before importing pyplot
from matplotlib import use
use('PS') # postscript
matplotlibrc
to change the matplotlib defaults
In [4]:
from matplotlib import matplotlib_fname
matplotlib_fname()
Out[4]:
You can also change them dynamically using the global rcParams object
In [5]:
from matplotlib import rcParams
In [6]:
rcParams.keys()
Out[6]:
In [7]:
rcParams['font.family']
Out[7]:
In [8]:
rcParams['font.family'] = 'monospace'
In [9]:
rcParams['font.family']
Out[9]:
In [10]:
rcParams['font.family'] = 'sans-serif'
You can also use the rc_context
context manager
In [11]:
from matplotlib import rc_context
In [12]:
with rc_context({'font.family': 'monospace'}):
print(rcParams['font.family'])
In [13]:
print(rcParams['font.family'])
show
import matplotlib.pyplot as plt
plt.ion()
plt.plot([1, 2, 3, 4, 5])
plt.title("Title")
matplotlibrc
import matplotlib.pyplot as plt
plt.ion()
or with
from matplotlib import interactive
interactive()
In [14]:
import matplotlib.pyplot as plt
In [15]:
plt.plot([1, 2, 3, 4])
plt.title("Title")
plt.xlabel("X")
Out[15]:
draw
or draw_if_interactive
to see changes
In [16]:
fig, ax = plt.subplots()
ax.plot([1, 2, 3, 4, 5])
ax.set_title("Title")
plt.draw_if_interactive()
plot
method takes x values, then y values
In [17]:
plt.plot([1, 5, 3])
Out[17]:
Function | Description |
---|---|
autoscale |
Autoscale the axis view to the data (toggle). |
axes |
Add an axes to the figure. |
axis |
Convenience method to get or set axis properties. |
cla |
Clear the current axes. |
clf |
Clear the current figure. |
clim |
Set the color limits of the current image. |
delaxes |
Remove an axes from the current figure. |
locator_params |
Control behavior of tick locators. |
margins |
Set or retrieve autoscaling margins. |
figure |
Creates a new figure. |
gca |
Return the current axis instance. |
gcf |
Return a reference to the current figure. |
gci |
Get the current colorable artist. |
hold |
Set the hold state. |
ioff |
Turn interactive mode off. |
ion |
Turn interactive mode on. |
ishold |
Return the hold status of the current axes. |
isinteractive |
Return status of interactive mode. |
rc |
Set the current rc params. |
rc_context |
Return a context manager for managing rc settings. |
rcdefaults |
Restore the default rc params. |
savefig |
Save the current figure. |
sca |
Set the current Axes instance. |
sci |
Set the current image. |
set_cmap |
Set the default colormap |
setp |
Set a property on an artist object |
show |
Display a figure |
subplot |
Return a subplot axes positioned by the given grid definition. |
subplot2grid |
Create a subplot in a grid. |
subplot_tool |
Launch a subplot tool window for a figure. |
subplots |
Create a figure with a set of subplots already made. |
subplots_adjust |
Tune the subplot layout. |
switch_backend |
Switch the default backend. |
tick_params |
Change the appearance of ticks and tick labels. |
ticklabel_format |
Change the ScalarFormatter used by default for linear axes. |
tight_layout |
Automatically adjust subplot parameters to give specified padding. |
xkcd |
Turns on XKCD sketch-style drawing mode. |
xlabel |
Set the x axis label of the current axis. |
xlim |
Get or set the x limits of the current axes. |
xscale |
Set the scaling of the x-axis. |
xticks |
Get or set the x-limits of the current tick locations and labels. |
ylabel |
Set the y axis label of the current axis. |
ylim |
Get or set the y-limits of the current axes. |
yscale |
Set the scaling of the y-axis. |
yticks |
Get or set the y-limits of the current tick locations and labels. |
Function | Description |
---|---|
acorr |
Plot the autocorrelation of x |
bar |
Make a bar plot |
barbs |
Plot a 2-D field of barbs |
barh |
Make a horizontal bar plot |
boxplot |
Make a box and whisker plot |
broken_barh |
Plot horizontal bars |
cohere |
Plot the coherence between x and y |
contour |
Plot contours |
contourf |
Plot filled contours |
csd |
Plot cross-spectral density |
errorbar |
Plot an errorbar graph |
eventplot |
Plot identical parallel lines at specific positions |
fill |
Plot filled polygons |
fill_between |
Make filled polygons between two curves |
fill_betweenx |
Make filled polygons between two horizontal curves |
hexbin |
Make a hexagonal binning plot |
hist |
Plot a histogram |
hist2d |
Make a 2D histogram plot |
imshow |
Display an image on the axes |
loglog |
Make a plot with log scaling on both the x and y axis |
matshow |
Display an array as a matrix in a new figure window |
pcolor |
Create a pseudocolor plot of a 2-D array |
pcolormesh |
Plot a quadrilateral mesh |
pie |
Plot a pie chart |
plot |
Plot lines and/or markers |
plot_date |
Plot with data with dates |
polar |
Make a polar plot |
psd |
Plot the power spectral density |
quiver |
Plot a 2-D field of arrows |
scatter |
Make a scatter plot of x vs y |
semilogx |
Make a plot with log scaling on the x axis |
semilogy |
Make a plot with log scaling on the y axis |
specgram |
Plot a spectrogram |
spy |
Plot the sparsity pattern on a 2-D array |
stackplot |
Draws a stacked area plot |
stem |
Create a stem plot |
step |
Make a step plot |
streamplot |
Draws streamlines of a vector flow |
tricontour |
Draw contours on an unstructured triangular grid |
tricontourf |
Draw filled contours on an unstructured triangular grid |
tripcolor |
Create a pseudocolor plot of an unstructured triangular grid |
triplot |
Draw a unstructured triangular grid as lines and/or markers |
xcorr |
Plot the cross-correlation between x and y |
Function | Description |
---|---|
annotate |
Create an annotation: a piece of text referring to a data point |
arrow |
Add an arrow to the axes |
axhline |
Add a horizontal line across the axis |
axhspan |
Add a horizontal span (rectangle) across the axis |
axvline |
Add a vertical line across the axes |
axvspan |
Add a vertical span (rectangle) across the axes |
box |
Turn the axes box on or off |
clabel |
Label a contour plot |
colorbar |
Add a colorbar to a plot |
grid |
Turn the axes grids on or off |
hlines |
Plot horizontal lines |
legend |
Place a legend on the current axes |
minorticks_off |
Remove minor ticks from the current plot |
minorticks_on |
Display minor ticks on the current plot |
quiverkey |
Add a key to a quiver plot |
rgrids |
Get or set the radial gridlines on a polar plot |
suptitle |
Add a centered title to the figure |
table |
Add a table to the current axes |
text |
Add text to the axes |
title |
Set a title of the current axes |
vlines |
Plot vertical lines |
xlabel |
Set the x axis label of the current axis |
ylabel |
Set the y axis label of the current axis" |
Figure
is the central object of matplotlib
In [18]:
plt.Figure?
In [19]:
fig = plt.Figure()
In [20]:
plt.close()
plt.close('all')
is handyfigsize
, a tuple of integers specifying the width and height in inches
In [21]:
fig = plt.figure(figsize=(5, 5))
add_subplot
method to put an axes on a figure
In [22]:
fig = plt.figure()
ax = fig.add_subplot(111)
lines = ax.plot([1, 2, 3])
text = ax.set_xlabel("X")
In [23]:
fig = plt.figure(figsize=(10, 5))
ax1 = fig.add_subplot(121)
ax1.plot([1, 2, 3])
ax2 = fig.add_subplot(122)
ax2.plot([3, 2, 1])
Out[23]:
plt
for almost everything but figure creationpyplot
namespace call gca
to get the current axis and then delegate to the method of the Axes object
In [24]:
plt.xlabel??
plt.subplots
plt
namespace with a few exceptions
In [25]:
fig, ax = plt.subplots(figsize=(8, 6))
ax.scatter(np.random.randn(20), np.random.randn(20))
Out[25]:
Notebook aside
You can work on figures across cells. Just make the existing figure object the last line in the cell.
In [26]:
fig, ax = plt.subplots(figsize=(8, 6))
ax.scatter(np.random.randn(20), np.random.randn(20))
Out[26]:
In [27]:
ax.scatter(np.random.randn(20), np.random.randn(20), color='r')
fig
Out[27]:
Exercise
Let's make some basic plots. Make a scatter plot as above with 500 points. Draw random numbers from 0 to 100 for the y axis and set the limits of the y axis at 0 and 200.
In [28]:
plt.plot?
".": point
",": pixel
"o": circle
"*": star
"+": plus
"x": x
"D”: diamond
Exercises
Create a figure that holds two subplots in two rows. In the top one, plot a sin curve from $-2\pi$ to $2\pi$ in green. In the second one, plot a dashed red line (Hint: you may find np.linspace
to be useful).
In [29]:
x = np.linspace(-2*np.pi, 2*np.pi, 100)
In [30]:
y = np.sin(x)
In [31]:
plt.plot(x, y)
Out[31]:
Labeling lines allows automatic legend creation
In [32]:
fig, ax = plt.subplots(figsize=(8, 8))
ax.plot([1, 2, 4, 5], label="Line 1")
ax.plot([2, 5, 3, 4], label="Line 2")
legend = ax.legend(loc='best', fontsize=20)
You can label the X and Y axes
In [33]:
fig, ax = plt.subplots(figsize=(8, 8))
ax.plot([1, 2, 4, 5], label="Line 1")
ax.plot([2, 5, 3, 4], label="Line 2")
ax.set_xlabel("X", fontsize=20)
ax.set_ylabel("Y", fontsize=20)
legend = ax.legend(loc='best', fontsize=20)
Label the axes with a title
In [34]:
fig, ax = plt.subplots(figsize=(8, 8))
ax.plot([1, 2, 4, 5], label="Line 1")
ax.plot([2, 5, 3, 4], label="Line 2")
ax.set_xlabel("X", fontsize=20)
ax.set_ylabel("Y", fontsize=20)
ax.set_title("Title", fontsize=20)
legend = ax.legend(loc='best', fontsize=20)
tick_params
to adjust the appearance of the ticks
In [35]:
fig, ax = plt.subplots(figsize=(8, 8))
ax.grid(False)
ax.tick_params(axis='y', which='major', length=15, right=False)
ax.tick_params(axis='x', which='major', length=15, top=False, direction="out", pad=15)
You can set your own tick labels
In [36]:
fig, ax = plt.subplots(figsize=(8, 8))
ax.grid(False)
ax.tick_params(axis='y', which='major', length=15, right=False)
ax.tick_params(axis='x', which='major', length=15, top=False)
ticklabels = ax.xaxis.set_ticklabels(['aaaa', 'bbbb', 'cccc',
'dddd', 'eeee', 'ffff'],
rotation=45, fontsize=15)
The spines are the boundaries of the axes, and they can be selectively turned off
In [37]:
ax.spines
Out[37]:
In [38]:
fig, ax = plt.subplots(figsize=(8, 8))
ax.tick_params(bottom=False, top=False, left=False, right=False)
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['bottom'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.grid(False)
ax.xaxis.set_ticklabels([])
ax.yaxis.set_ticklabels([]);
In [39]:
x, y = np.random.randn(2, 100)
x.sort()
In [40]:
fig, ax = plt.subplots()
ax.plot(y, 'g--')
Out[40]:
In [41]:
fig, ax = plt.subplots()
ax.plot(x, y)
Out[41]:
In [42]:
fig, ax = plt.subplots()
ax.plot(x, y, 'o')
Out[42]:
In [43]:
x2, y2 = np.random.randn(2, 200)
x2.sort()
In [44]:
fig, ax = plt.subplots()
lines = ax.plot(x, y, 'o', x2, y2, 'ro', ms=8, alpha=.5)
Series
and DataFrame
s have a plot methodThey take a kind
keyword argument which accepts several values for plots other than the default line plot. These include:
bar
or barh
for bar plotshist
for histogrambox
for boxplotkde
or 'density' for density plotsarea
for area plotsscatter
for scatter plotshexbin
for hexagonal bin plotspie
for pie plots
In [45]:
y = pd.Series(np.random.randn(25))
y.plot()
Out[45]:
In [46]:
y.cumsum().plot()
Out[46]:
AxesSubplot
objects, so we have our hook in to all of the powerful methods from matplotlibDataFrames
In [47]:
dta = pd.DataFrame({'normal': np.random.normal(size=100),
'gamma': np.random.gamma(1, size=100),
'poisson': np.random.poisson(size=100)})
ax = dta.cumsum(0).plot()
Exercise
Without re-plotting any of the above, re-size the fonts for the labels and the legend and display the figure.
In [48]:
ax = dta.cumsum(0).plot(subplots=True, figsize=(10, 10))
tight_layout
belowtight_layout
automatically adjusts the subplot params so that the subplot fits the figurefig.subplots_adjust
In [49]:
axes = dta.cumsum(0).plot(subplots=True, figsize=(10, 10))
fig = axes[0].figure
fig.tight_layout()
In [50]:
axes = dta.cumsum().plot(secondary_y='normal')
In [51]:
fig, axes = plt.subplots(1, 3, figsize=(12, 4))
for i, ax in enumerate(axes):
variable = dta.columns[i]
ax = dta[variable].cumsum().plot(ax=ax)
ax.set_title(variable, fontsize=16)
axes[0].set_ylabel("Cumulative Sum", fontsize=14);
In [52]:
dta = pd.read_csv("../data/weather_nyc.csv")
In [ ]:
dta = dta.ix[dta.year < 2015] # truncate to end of year
Or equivalently
In [ ]:
dta.query("year < 2015")
Recall that pandas.cut
can be used to bin continuous data into buckets
In [ ]:
bins = [dta.temp.min(), 32, 55, 80, dta.temp.max()]
bins
In [ ]:
labels = ["freezing", "cold", "warm", "hot"]
dta["temp_bin"] = pd.cut(dta.temp, bins, labels=labels)
In [ ]:
try:
from scipy.constants import F2C
except ImportError: # no scipy installed
def F2C(f):
return (np.array(f) - 32)/1.8
In [ ]:
lmap = lambda func, x : list(map(func, x))
Celsius bins
In [ ]:
bins = [dta.tempc.min()] + lmap(F2C, (32, 55, 80)) + [dta.tempc.max()]
bins
In [ ]:
labels = ["freezing", "cold", "warm", "hot"]
dta["tempc_bin"] = pd.cut(dta.temp, bins, labels=labels)
In [ ]:
dta.head()
In [ ]:
ax = dta.groupby("temp_bin").size().plot(kind="bar")
plot
methodMake the xticks labels bigger and rotate them
In [ ]:
ax = dta.groupby("temp_bin").size().plot(kind="bar", rot=0, fontsize=16, figsize=(8, 5))
ax.set_xlabel("Temperature")
ax.set_ylabel("Number of Days")
ax.set_title("Temperatures from 1995 - 2014");
Horizontal bar chart
In [ ]:
dta.groupby(["season", "temp_bin"]).size().plot(kind="barh", figsize=(6, 8))
Stacked bar chart
The pandas crosstab function creates a cross-tabulation of two or more factors.
In [ ]:
ct = pd.crosstab(dta.temp_bin, dta.season)
ct
In [ ]:
ax = ct.plot(kind="bar", stacked=True, figsize=(12, 8), grid=False,
legend=True)
In [ ]:
colors = plt.cm.Paired(np.linspace(0, 1, 4))
colors
In [ ]:
ax = pd.crosstab(dta.temp_bin, dta.season).plot(kind="bar", stacked=True,
figsize=(12, 8), grid=False,
legend=True, colors=colors, rot=0,
fontsize=16)
# adjust the fontsize of the legend
legend = ax.get_legend()
for text in legend.get_texts():
text.set_fontsize(18)
legend.get_title().set_fontsize(20)
In [ ]:
dta.temp.min()
In [ ]:
ax = dta.temp.plot(kind="hist", bins=50)
It's even a good exercise here! Let's drop turn the -99 into NaNs.
In [ ]:
dta.ix[dta.temp == -99, ["temp", "tempc"]] = np.nan
Incidentally, pandas will handle nulls in plotting
In [ ]:
ax = dta.temp.plot(kind="hist", bins=50, grid=False, figsize=(10, 6))
# plot a vertical line that spans the axis
line = ax.axvline(dta.temp.mean(), color='r', lw=3, label="Mean")
# specifically add a legend
handles, labels = ax.get_legend_handles_labels()
ax.legend([handles[0]], [labels[0]], fontsize=16)
In [ ]:
handles
In [ ]:
def scotts_rule(x):
x = x.dropna()
std = x.std()
return 3.5 * std / (len(x)**(1./3))
def width_to_nbins(x, h):
x = x.dropna()
return int(round(x.ptp()/h))
In [ ]:
h = scotts_rule(dta.temp)
nbins = width_to_nbins(dta.temp, h)
In [ ]:
ax = dta.temp.plot(kind="hist", bins=nbins, grid=False, figsize=(10, 6))
# plot a vertical line that spans the axis
line = ax.axvline(dta.temp.mean(), color='r', lw=3, label="Mean")
In [ ]:
ax = dta.temp.plot(kind='kde', grid=False, figsize=(10, 6))
ax.set_xlim(0, 100)
We can compare the KDE to the normed histogram
In [ ]:
ax = dta.temp.plot(kind='kde', grid=False, figsize=(10, 6), color='r', lw=3)
ax = dta.temp.plot(kind="hist", bins=nbins, grid=False, figsize=(10, 6), ax=ax, normed=True, alpha=.7)
ax.set_xlim(0, 100)
Exercise
Create KDE estimates for the temperature in each season on a single plot. Label the plotted lines.
In [ ]:
ax = dta.boxplot(column="temp", by="season", grid=False, figsize=(8, 10), fontsize=16,
whis=[5, 95])
ax.set_title(ax.get_title(), fontsize=20)
ax.xaxis.get_label().set_fontsize(18)
fig = ax.figure
# Change the size of the figure title
# http://stackoverflow.com/a/12449783/535665
fig.texts[0].set_fontsize(20)
# whitespace between axes and fig boundary
fig.subplots_adjust(top=.85)
In [ ]:
def jitter(x, n, noise=.05):
return x + np.random.normal(0, noise, size=n)
In [ ]:
ax = dta.boxplot(column="temp", by="season", grid=False, figsize=(8, 10), fontsize=16,
whis=[5, 95])
ax.set_title(ax.get_title(), fontsize=20)
ax.xaxis.get_label().set_fontsize(18)
fig = ax.figure
# http://stackoverflow.com/a/12449783/535665
fig.texts[0].set_fontsize(20)
# whitespace between axes and fig boundary
fig.subplots_adjust(top=.85)
for i, season in enumerate(ax.get_xticklabels()):
y = dta.ix[dta.season == season.get_text()].temp
x = jitter(i + 1, len(y))
# there's a lot of data so turn the alpha way down (or sub-sample)
ax.plot(x, y, 'ro', alpha=.05)
In [ ]:
baseball = pd.read_csv("../data/baseball.csv")
In [ ]:
baseball.head()
In [ ]:
ax = baseball.plot(kind="scatter", x="ab", y="h", grid=False, figsize=(8, 6), s=8**2,
alpha=.7)
ax.margins(0)
ax.set_xlim(0, 700)
ax.set_ylim(0, 200)
In [ ]:
ax = baseball.plot(kind="scatter", x="ab", y="h", grid=False, figsize=(8, 6), s=baseball.hr*10,
alpha=.5)
ax.margins(0)
ax.set_xlim(0, 700)
ax.set_ylim(0, 200)
c
keyword
In [ ]:
ax = baseball.plot(kind="scatter", x="ab", y="h", grid=False, figsize=(8, 6), c="DarkGreen", s=50)
ax = baseball.plot(kind="scatter", x="ab", y="rbi", grid=False, figsize=(8, 6), c="Blue", s=50,
ax=ax)
ax.margins(0)
ax.set_xlim(0, 700)
ax.set_ylim(0, 200);
c
can also be a color intensitycmap
keyword
In [ ]:
ax = baseball.plot(kind="scatter", x="ab", y="h", grid=False, figsize=(8, 6), c=baseball.hr*10,
s=40, cmap="hot")
ax.margins(0)
ax.set_xlim(0, 700)
ax.set_ylim(0, 200);
In [ ]:
ax = baseball.plot(kind="scatter", x="ab", y="h", grid=False, figsize=(8, 6), c=baseball.hr*10,
s=40, cmap="hot")
ax.margins(0)
ax.set_xlim(0, 700)
ax.set_ylim(0, 200)
fig = ax.figure
# colorbars are actually a separate subplot in your figure
colorbar = fig.axes[1]
colorbar.yaxis.set_tick_params(right=False);
pd.scatter_matrix
To view a large number of variables simultaenously
In [ ]:
ax = pd.scatter_matrix(baseball.loc[:,'r':'sb'], figsize=(14, 10), diagonal='hist')
In [ ]:
ax = pd.scatter_matrix(baseball.loc[:,'r':'sb'], figsize=(14, 10), diagonal='kde')
In [ ]:
idx = pd.to_datetime(dta.year*10000 + dta.month*100 + dta.day, format='%Y%m%d')
In [ ]:
idx
In [ ]:
y = dta.set_index(idx).temp
In [ ]:
y.head()
In [ ]:
y.index
In [ ]:
#ax = y.plot(figsize=(12, 8))
ax = pd.rolling_mean(y, window=60, min_periods=1, center=True).plot(figsize=(12, 8),
label="Rolling 2-month mean")
means = y.groupby(lambda x : x.year).mean()
means.index = pd.DatetimeIndex(pd.to_datetime(means.index * 10000 + 1231, format="%Y%m%d"))
ax = means.plot(ax=ax, label="Yearly Average")
legend = ax.legend()
plt.subplot2grid
is a helper function for creating grids of subplotsax = plt.subplot(2, 2, 1)
subplot2grid
In [ ]:
ax = plt.subplot2grid((2, 2), (0, 0))
subplot2grid
for creating multiple subplots that span columns, for example
In [ ]:
with plt.rc_context(rc={"xtick.labelsize": 0,
"ytick.labelsize": 0,
"axes.facecolor": "lightgray",
"figure.figsize": (8, 8)}):
ax1 = plt.subplot2grid((3,3), (0,0), colspan=3)
ax2 = plt.subplot2grid((3,3), (1,0), colspan=2)
ax3 = plt.subplot2grid((3,3), (1, 2), rowspan=2)
ax4 = plt.subplot2grid((3,3), (2, 0))
ax5 = plt.subplot2grid((3,3), (2, 1))
ax1.figure.suptitle("subplot2grid", fontsize=20)
GridSpec
class directly to create the same plot
In [ ]:
from matplotlib.gridspec import GridSpec
with plt.rc_context(rc={"xtick.labelsize": 0,
"ytick.labelsize": 0,
"axes.facecolor": "lightgray"}):
fig, ax = plt.subplots(figsize=(8, 8))
gs = GridSpec(3, 3)
ax1 = plt.subplot(gs[0, :])
# identical to ax1 = plt.subplot(gs.new_subplotspec((0,0), colspan=3))
ax2 = plt.subplot(gs[1,:-1])
ax3 = plt.subplot(gs[1:, -1])
ax4 = plt.subplot(gs[-1,0])
ax5 = plt.subplot(gs[-1,-2])
fig.suptitle("GridSpec", fontsize=20)
"At the heart of quantitative reasoning is a single question: Compared to what? Small multiple designs, multivariate and data bountiful, answer directly by visually enforcing comparisons of changes, of the differences among objects, of the scope of alternatives. For a wide range of problems in data presentation, small multiples are the best design solution."
-Edward Tufte
In [ ]:
import seaborn as sns
tips = sns.load_dataset("tips")
In [ ]:
tips.head()
In [ ]:
with mpl.rc_context(rc={"legend.fontsize": "18", "axes.titlesize": "18"}):
g = sns.FacetGrid(tips, col="sex", hue="smoker", size=7)
g.map(plt.scatter, "total_bill", "tip", alpha=.7, s=80)
g.add_legend()
g._legend.get_title().set_fontsize(20)
g.axes[0, 0].title.set_fontsize(20)
g.axes[0, 0].xaxis.get_label().set_fontsize(20)
g.axes[0, 1].title.set_fontsize(20)
g.axes[0, 1].xaxis.get_label().set_fontsize(20)
The violin plot is a combination of a boxplot and a kernel density estimator
In [ ]:
ax = dta.boxplot(column="temp", by="season", grid=False, figsize=(8, 10), fontsize=16,
whis=[5, 95])
In [ ]:
X = dta[["temp", "season"]].dropna()
In [ ]:
ax = sns.violinplot(X.temp, groupby=X.season)
We can plot the points inside the violins and re-order the seasons
In [ ]:
ax = sns.violinplot(X.temp, groupby=X.season, inner='points', alpha=.5,
order=['Winter', 'Spring', 'Summer', 'Fall'])
In [ ]:
temp95 = dta.query("year == 1995")[["temp", "month", "day"]]
temp14 = dta.query("year == 2014")[["temp", "month", "day"]]
In [ ]:
temps = temp95.merge(temp14, on=["month", "day"], how="inner", suffixes=("_95", "_14"))
In [ ]:
g = sns.jointplot(temps.temp_95, temps.temp_14, kind="kde", size=7, space=0)
We can also look at a hexbin plot of the same data with the marginal distributions as histograms.
In [ ]:
g = sns.jointplot(temps.temp_95, temps.temp_14, kind="hex", color="#4CB391",
joint_kws={"bins": 200})
The mpld3 project brings together Matplotlib, and D3js, the popular Javascript library for creating interactive data visualizations for the web. The result is a simple API for exporting your matplotlib graphics to HTML code which can be used within the browser, within standard web pages, blogs, or tools such as the IPython notebook.
Let's look at a regular scatter plot
In [ ]:
fig, ax = plt.subplots(figsize=(6, 6))
np.random.seed(0)
x, y = np.random.normal(size=(2, 200))
color, size = np.random.random((2, 200))
ax.scatter(x, y, c=color, s=500 * size, alpha=0.5, cmap="rainbow")
ax.grid(color='lightgray', alpha=0.7)
Unfortunately, this is just a static image. Let's use mpld3 to change that. Using the display
command, you get a fully interactive visualization of the figure.
In [ ]:
import mpld3
mpld3.display(fig)
Notice the toolbar on hover. You can use that to interact with the figure.
You can use mpld3 for every plot that you render in the notebook by executing
mpld3.enable_notebook()
Much like event handling via callback functions in regular matplotlib (not covered in this notebook), you can define plugins for mpld3 to specify additional interactivity.
A number of plugins are built-in, and it is also possible to define new, custom plugins for nearly limitless interactive behaviors. For example, here is the built-in Linked Brushing plugin that allows exploration of multi-dimensional datasets:
In [ ]:
from mpld3 import plugins
fig, ax = plt.subplots(6, 6, figsize=(6, 6))
fig.subplots_adjust(hspace=0.1, wspace=0.1)
ax = ax[::-1]
X = baseball.loc[:, 'r':'rbi']
for i in range(6):
for j in range(6):
ax[i, j].xaxis.set_major_formatter(plt.NullFormatter())
ax[i, j].yaxis.set_major_formatter(plt.NullFormatter())
points = ax[i, j].scatter(X.values[:, j], X.values[:, i])
if i == 0:
ax[i, j].set_xlabel(X.columns[j])
ax[i, 0].set_ylabel(X.columns[i])
plugins.connect(fig, plugins.LinkedBrush(points))
mpld3.display(fig)
In [ ]:
from IPython.display import Image, HTML
# Image("./tufte.svg")
HTML("./tufte.svg")
In [ ]:
import os
to_colors = lambda x : x/255.
blue3 = list(map(to_colors, (24, 116, 205))) # 1874CD
wheat2 = list(map(to_colors, (238, 216, 174))) # EED8AE
wheat3 = list(map(to_colors, (205, 186, 150))) # CDBA96
wheat4 = list(map(to_colors, (139, 126, 102))) # 8B7E66
firebrick3 = list(map(to_colors, (205, 38, 38))) # CD2626
gray30 = list(map(to_colors, (77, 77, 77))) # 4D4D4D
In [ ]:
idx = range(366)
ax.vlines
)wheat3
wheat4
present_highs
and present_lows
np.where
to be helpful)
In [ ]:
np.where([True, False, False, True, False])[0]
ax.annotate
)annotate
and arrows
ax.text
)
In [ ]:
yticks = range(-10, 101, 10)
ylabels = [str(i) + u"\u00b0" for i in yticks]
ylabels
In [ ]:
with plt.xkcd():
# Based on "Stove Ownership" from XKCD by Randall Monroe
# http://xkcd.com/418/
fig = plt.figure()
ax = fig.add_axes((0.1, 0.2, 0.8, 0.7))
ax.spines['right'].set_color('none')
ax.spines['top'].set_color('none')
plt.xticks([])
plt.yticks([])
ax.set_ylim([-30, 10])
data = np.ones(100)
data[70:] -= np.arange(1, 31)
plt.annotate(
'THE DAY I REALIZED\nI COULD COOK BACON\nWHENEVER I WANTED',
xy=(70, 1), arrowprops=dict(arrowstyle='->'), xytext=(15, -10), zorder=-1)
plt.plot(data)
plt.xlabel('time')
plt.ylabel('my overall health')
fig.text(0.5, 0.05,
'"Stove Ownership" from xkcd by Randall Monroe', ha='center')
In [ ]:
with plt.xkcd():
# Based on "The data So Far" from XKCD by Randall Monroe
# http://xkcd.com/373/
fig = plt.figure()
ax = fig.add_axes((0.1, 0.2, 0.8, 0.7))
ax.bar([-0.125, 1.0-0.125], [0, 100], 0.25)
ax.spines['right'].set_color('none')
ax.spines['top'].set_color('none')
ax.xaxis.set_ticks_position('bottom')
ax.set_xticks([0, 1])
ax.set_xlim([-0.5, 1.5])
ax.set_ylim([0, 110])
ax.set_xticklabels(['CONFIRMED BY\nEXPERIMENT', 'REFUTED BY\nEXPERIMENT'])
ax.set_yticks([])
fig.suptitle("CLAIMS OF SUPERNATURAL POWERS")
fig.text(0.5, 0.01,
'"The Data So Far" from xkcd by Randall Monroe',
ha='center', )
In [ ]:
from matplotlib.ticker import MaxNLocator
In [ ]:
x = np.arange(20)
y = np.random.randn(20)
In [ ]:
fig, ax = plt.subplots()
ax.plot(x, y)
ax.xaxis.set_major_locator(MaxNLocator(nbins=8))
In [ ]:
x = np.arange(20)
y1 = np.random.randn(20)
y2 = np.random.randn(20)
In [ ]:
fig, axes = plt.subplots(2, 1, sharex=True)
axes[0].plot(x, y1)
axes[1].plot(x, y2)
fig.tight_layout()
In [ ]:
t = np.arange(0.01, 10.0, 0.01)
s1 = np.exp(t)
s2 = np.sin(2*np.pi*t)
In [ ]:
fig, ax1 = plt.subplots()
ax1.plot(t, s1, 'b-')
ax1.set_xlabel('time (s)')
# Make the y-axis label and tick labels match the line color.
ax1.set_ylabel('exp', color='b', fontsize=18)
for tl in ax1.get_yticklabels():
tl.set_color('b')
ax2 = ax1.twinx()
ax2.plot(t, s2, 'r.')
ax2.set_ylabel('sin', color='r', fontsize=18)
for tl in ax2.get_yticklabels():
tl.set_color('r')
In [ ]:
fig, ax = plt.subplots()
ax.imshow(np.random.uniform(0, 1, size=(50, 50)), cmap="RdYlGn")
text.usetext
option
In [ ]:
fig, ax = plt.subplots()
ax.set_ylabel("$\\beta^2$", fontsize=20, rotation=0, labelpad=20)
In [ ]:
with mpl.rc_context(rc={"text.usetex": True}):
fig, ax = plt.subplots(figsize=(5, 5))
ax.set_ylabel("$\\beta^2$", fontsize=20, rotation=0, labelpad=20)
In [ ]:
from matplotlib.pylab import bivariate_normal
np.random.seed(12)
delta = 0.025
x = np.arange(-3.0, 3.0, delta)
y = np.arange(-2.0, 2.0, delta)
X, Y = np.meshgrid(x, y)
Z1 = bivariate_normal(X, Y, 1.0, 1.0, 0.0, 0.0)
Z2 = bivariate_normal(X, Y, 1.5, 0.5, 1, 1)
# difference of Gaussians
Z = 10.0 * (Z2 - Z1)
In [ ]:
with mpl.rc_context(rc={'xtick.direction': 'out',
'ytick.direction': 'out'}):
# Create a simple contour plot with labels using default colors. The
# inline argument to clabel will control whether the labels are draw
# over the line segments of the contour, removing the lines beneath
# the label
fig, ax = plt.subplots(figsize=(8, 8))
contours = ax.contour(X, Y, Z)
ax.clabel(contours, inline=1, fontsize=10)
In [ ]:
fig, ax = plt.subplots()
ax.arrow(0, 0, 0.5, 0.5, head_width=0.05, head_length=0.1, fc='k', ec='k')
ax.arrow(0.25, 0, 0.5, 0.5, head_width=0, head_length=0, fc='k', ec='k')
In [ ]:
x = np.arange(0.0, 2, 0.01)
y1 = np.sin(2*np.pi*x)
y2 = 1.2*np.sin(4*np.pi*x)
In [ ]:
fig, axes = plt.subplots(3, 1, sharex=True, figsize=(6, 10))
axes[0].fill_between(x, 0, y1)
axes[0].set_ylabel('between y1 and 0')
axes[1].fill_between(x, y1, 1)
axes[1].set_ylabel('between y1 and 1')